Objetivo principal:
Objetivos especÃficos:
Determinar el modelo estadÃstico que mejor permita explorar la relación entre el clima y el número de casos reportados de malaria en la ventana temporal del estudio.
Determinar si el efecto del clima en el número de casos reportados de malaria varia en el tiempo según el modelo.
Contamos con una tabla de datos con el número de casos reportados de malaria (falciparum y vivax) por mes en cada distrito de la región de Loreto desde de enero de 2000 a diciembre de 2017. Además, para cada uno de estos registros se tiene el valor mensual de variables climáticas consideradas para el estudio.
Las columnas de la tabla acompañadas de su descripción se muestra continuación:
| Columna | Descripción |
|---|---|
district |
Distrito. |
year |
Año. |
month |
Mes. |
falciparum |
Número de casos de falciparum. |
vivax |
Número de casos de vivax. |
aet |
Evapotranspiración real. |
prcp |
Precipitación |
q |
EscorrentÃa |
soil |
Humedad del suelo. |
tmax |
Temperatura máxima. |
tmin |
Temperatura mÃnima |
water_deficit |
Déficit hÃdrico. |
pop2015 |
Población del distrito en el año 2015. |
province |
Provincia a la que pertenece el distrito. |
region |
Region/Departamento al que pertenece el distrito (Loreto) |
id_district |
ID del distrito. |
################
# Data loading #
################
data_path <- file.path('_data/') # Data directory path
file_name <- 'dt_final.csv' # csv. file name
file_path <- file.path(data_path, file_name) # Full csv. file path
library(tidyverse)
# Column names
col_names <- c(
'district',
'year',
'month',
'falciparum',
'vivax',
'aet',
'prcp',
'q',
'soilm',
'tmax',
'tmin',
'water_deficit',
'loss',
'loss_km2',
'cum_loss_km2',
'diag',
'enviro',
'nets',
'workers',
'pamafro',
'pop2015',
'province',
'region',
'id_district'
)
# Column types
col_types <- readr::cols(
district = col_character(),
year = col_integer(),
month = col_integer(),
falciparum = col_integer(),
vivax = col_integer(),
aet = col_double(),
prcp = col_double(),
q = col_double(),
soilm = col_double(),
tmax = col_double(),
tmin = col_double(),
water_deficit = col_double(),
loss = col_double(),
loss_km2 = col_double(),
cum_loss_km2 = col_double(),
diag = col_integer(),
enviro = col_integer(),
nets = col_integer(),
workers = col_integer(),
pamafro = col_integer(),
pop2015 = col_integer(),
province = col_character(),
region = col_character(),
id_district = col_character()
)
# Read raw csv. file into a data frame
raw_dataset <-
readr::read_csv(
file = file_path,
col_names = col_names,
col_types = col_types,
skip = 1,
locale = readr::locale(encoding = "utf-8")
)
# Create columns
# - report_date:
# Date of cases report in date-time format
# - control:
# Flag column indicating if the PAMAFRO intervention was taken place
# in that date
raw_dataset %>%
dplyr::mutate(
report_date =
lubridate::make_datetime(
year = year,
month = month,
day = 1L # Take 01 as the day
)
) %>%
dplyr::mutate(
control =
ifelse(
# PAMAFRO intervention took place between 2005-10 and 2010-09
test = (report_date >= '2005-09-01' & report_date <= '2010-09-01'),
# >= 2005-09-01 doesn't take 2005-09 and starts with 2005-10 for some
# reason
yes = 1,
no = 0
)
) %>%
dplyr::arrange(report_date) ->
dataset # Final data frame
library(DT)
# Show table
DT::datatable(
data = dataset,
options =
list(
pageLength = 6,
scrollX = TRUE
)
)
dataset %>%
dplyr::group_by(region) %>%
dplyr::summarise(
n = n(), # Number of reports (dates x districts in Loreto)
sum = sum(falciparum), # Sum of reported cases
min = min(falciparum), # Min of reported cases
max = max(falciparum), # Max of reported cases
n_zero_cases = sum(falciparum < 1), # Number of reports with 0 cases
n_zero_cases_percent = round(
x = 100 * sum(falciparum < 1) / n(),
digits = 2
) # % of reports with 0 cases
) %>%
DT::datatable(
options =
list(
pageLength = 6,
scrollX = TRUE
)
)
### Provinces
dataset %>%
dplyr::group_by(province) %>%
dplyr::summarise(
n = n(), # Number of reports (dates x districts in every province)
sum = sum(falciparum), # Sum of reported cases
min = min(falciparum), # Min of reported cases
max = max(falciparum), # Max of reported cases
n_zero_cases = sum(falciparum < 1), # Number of reports with 0 cases
n_zero_cases_percent = round(
x = 100 * sum(falciparum < 1) / n(),
digits = 2
) # % of reports with 0 cases
) %>%
DT::datatable(
options =
list(
pageLength = 6,
scrollX = TRUE
)
)
theme_set(theme_light()) # Light theme
library(scales)
# Data frame with the dates of the PAMAFRO intervention to plot
control_dates <-
dplyr::tibble(
dates = c(
lubridate::make_datetime(
year = 2005,
month = 10,
day = 1L
),
lubridate::make_datetime(
year = 2010,
month = 09,
day = 1L
)
),
type = c(
'start',
'end'
)
)
dataset %>%
dplyr::group_by(
region,
report_date
) %>%
dplyr::summarise(
falciparum = sum(falciparum),
vivax = sum(vivax)
) %>%
tidyr::pivot_longer(
cols = c(
falciparum,
vivax
),
names_to = 'parasite'
) %>%
ggplot(
aes(
x = report_date,
y = value / 1000
)
) +
geom_line(
aes(
colour = parasite
),
size = 0.7
) +
scale_color_manual(
values = c('#00AFBB', '#E7B800')
) +
geom_vline(
data = control_dates,
aes(
xintercept = as.numeric(dates)
),
linetype = 'dashed',
colour = 'red',
alpha = 0.8
) +
scale_x_datetime(
date_labels = '%Y',
date_breaks = '1 year'
) +
labs(
y = 'Número de casos reportados de malaria (x1000)',
x = NULL
) +
ggtitle(
label = 'Número de casos mensuales reportados de malaria (x1000) por parásito
durante los años 2000 a 2017 en Loreto'
) +
theme(plot.title = element_text(hjust = 0.5))
dataset %>%
dplyr::group_by(
province,
report_date
) %>%
dplyr::summarise(
falciparum = sum(falciparum),
vivax = sum(vivax)
) %>%
tidyr::pivot_longer(
cols = c(
falciparum,
vivax
),
names_to = 'parasite'
) %>%
ggplot(
aes(
x = report_date,
y = value
)
) +
geom_line(
aes(
colour = parasite
),
size = 0.7
) +
scale_color_manual(
values = c('#00AFBB', '#E7B800')
) +
geom_vline(
data = control_dates,
aes(
xintercept = as.numeric(dates)
),
linetype = 'dashed',
colour = 'red'
) +
scale_x_datetime(
date_labels = '%Y',
date_breaks = '5 year'
) +
labs(
y = 'Número de casos reportados de malaria',
x = NULL
) +
facet_wrap(
facets = . ~ province,
scales = 'free'
) +
ggtitle(
label = 'Número de casos mensuales reportados de malaria por parásito
durante los años 2000 a 2017 en las provincias de Loreto'
) +
theme(plot.title = element_text(hjust = 0.5))
dataset %>%
dplyr::group_by(
district,
report_date
) %>%
dplyr::summarise(
falciparum = sum(falciparum),
vivax = sum(vivax)
) %>%
tidyr::pivot_longer(
cols = c(
falciparum,
vivax
),
names_to = 'parasite'
) %>%
ggplot(
aes(
x = report_date,
y = value
)
) +
geom_line(
aes(
colour = parasite
),
size = 0.7
) +
scale_color_manual(
values = c('#00AFBB', '#E7B800')
) +
geom_vline(
data = control_dates,
aes(
xintercept = as.numeric(dates)
),
linetype = 'dashed',
colour = 'red'
) +
scale_x_datetime(
date_labels = '%Y',
date_breaks = '5 year'
) +
labs(
y = 'Número de casos reportados de malaria',
x = NULL
) +
facet_wrap(
facets = . ~ district,
scales = 'free'
) +
ggtitle(
label = 'Número de casos mensuales reportados de malaria por parásito
durante los años 2000 a 2017 en los distritos de Loreto'
) +
theme(plot.title = element_text(hjust = 0.5))
# Get static correlations
get_static_correlations <- function(data, cols){
library(tibble, warn.conflicts = FALSE)
library(tidyr, warn.conflicts = FALSE)
# Takes the correlation of all the variables in cols w.r.t. the first one
# in the vector
cor_matrix <- stats::cor(
x = data[cols]
)
cor_vector <- matrix(
data = cor_matrix[1,],
nrow = 1,
dimnames =
list(
NULL,
names(cor_matrix[1,])
)
)
df_correlations <- tibble::as_tibble(
x = cor_vector
)
df_correlations_pivot <-
tidyr::pivot_longer(
data = df_correlations,
cols = cols[2:length(cols)],
names_to = 'variable',
values_to = 'value'
)
return(df_correlations_pivot)
}
# Get correlations in a given window
get_correlations <- function(data, cols, index){
library(lubridate, warn.conflicts = FALSE)
library(tibble, warn.conflicts = FALSE)
# Takes the correlation of all the variables in cols w.r.t. the first one
# in the vector
cor_matrix <- stats::cor(
x = data[cols]
)
cor_vector <- matrix(
data = cor_matrix[1,],
nrow = 1,
dimnames =
list(
NULL,
names(cor_matrix[1,])
)
)
date <- lubridate::as_datetime(
x = as.matrix(data[index])
)
df_correlations <- tibble::tibble(
from = min(date),
to = max(date)
)
df_correlations <- tibble::add_column(
.data = df_correlations,
tibble::as_tibble(cor_vector)
)
return(df_correlations)
}
# Get rolling correlations
rolling_correlation <- function(data, cols, index, window) {
library(slider)
library(lubridate, warn.conflicts = FALSE)
date <- lubridate::as_datetime(
x = as.matrix(data[index])
)
correlations <- slider::slide_period_dfr(
.x = data,
.i = date,
.period = 'month',
.f = get_correlations,
cols = cols,
index = index,
.every = 1,
.before = window - 1,
.complete = TRUE
)
return(correlations)
}
# Get plots for rolling correlations
plot_rolling_cor <- function(data, cor_data){
library(ggplot2, warn.conflicts = FALSE)
plot <- ggplot2::ggplot(
data = data,
ggplot2::aes(
x = to,
y = value
)
) +
ggplot2::geom_line() +
ggplot2::scale_x_datetime(
date_labels = '%Y',
date_breaks = '1 year'
) +
ggplot2::geom_vline(
data = control_dates,
ggplot2::aes(
xintercept = as.numeric(dates)
),
linetype = 'dashed',
colour = 'red'
) +
ggplot2::geom_hline(
data = cor_data,
ggplot2::aes(
yintercept = value
),
linetype = 'dashed',
colour = 'blue'
) +
ggplot2::geom_hline(
data = control_dates,
ggplot2::aes(
yintercept = 0
),
linetype = 'dashed',
alpha = 0.6
) +
ggplot2::facet_wrap(
ncol = 1,
facets = . ~ variable,
scales = 'free'
) +
ggplot2::labs(
y = NULL,
x = NULL
)
print(plot)
}
# Data frame with aggregations by region
dataset %>%
dplyr::group_by(
region, report_date
) %>%
dplyr::summarise(
aet = stats::median(aet),
prcp = stats::median(prcp),
soilm = stats::median(soilm),
tmax = stats::median(tmax),
vivax = sum(vivax),
falciparum = sum(falciparum)
) -> region_dataset
cor_falciparum_region <-
get_static_correlations(
data = region_dataset,
cols = c(
'falciparum',
'aet',
'prcp',
'soilm',
'tmax'
)
)
roll_cor_12_falciparum_region <- rolling_correlation(
data = region_dataset,
cols = c(
'falciparum',
'aet',
'prcp',
'soilm',
'tmax'
),
index = 'report_date',
window = 12
)
# Plot
roll_cor_12_falciparum_region %>%
tidyr::pivot_longer(
cols = c(
aet,
prcp,
soilm,
tmax
),
names_to = 'variable',
values_to = 'value'
) %>%
plot_rolling_cor(cor_data = cor_falciparum_region)
roll_cor_18_falciparum_region <- rolling_correlation(
data = region_dataset,
cols = c(
'falciparum',
'aet',
'prcp',
'soilm',
'tmax'
),
index = 'report_date',
window = 18
)
# Plot
roll_cor_18_falciparum_region %>%
tidyr::pivot_longer(
cols = c(
aet,
prcp,
soilm,
tmax
),
names_to = 'variable',
values_to = 'value'
) %>%
plot_rolling_cor(cor_data = cor_falciparum_region)
roll_cor_24_falciparum_region <- rolling_correlation(
data = region_dataset,
cols = c(
'falciparum',
'aet',
'prcp',
'soilm',
'tmax'
),
index = 'report_date',
window = 24
)
# Plot
roll_cor_24_falciparum_region %>%
tidyr::pivot_longer(
cols = c(
aet,
prcp,
soilm,
tmax
),
names_to = 'variable',
values_to = 'value'
) %>%
plot_rolling_cor(cor_data = cor_falciparum_region)
cor_vivax_region <- get_static_correlations(
data = region_dataset,
cols = c(
'vivax',
'aet',
'prcp',
'soilm',
'tmax'
)
)
roll_cor_12_vivax_region <- rolling_correlation(
data = region_dataset,
cols = c(
'vivax',
'aet',
'prcp',
'soilm',
'tmax'
),
index = 'report_date',
window = 12
)
# Plot
roll_cor_12_vivax_region %>%
tidyr::pivot_longer(
cols = c(
aet,
prcp,
soilm,
tmax
),
names_to = 'variable',
values_to = 'value'
) %>%
plot_rolling_cor(cor_data = cor_vivax_region)
roll_cor_18_vivax_region <- rolling_correlation(
data = region_dataset,
cols = c(
'vivax',
'aet',
'prcp',
'soilm',
'tmax'
),
index = 'report_date',
window = 18
)
# Plot
roll_cor_18_vivax_region %>%
tidyr::pivot_longer(
cols = c(
aet,
prcp,
soilm,
tmax
),
names_to = 'variable',
values_to = 'value'
) %>%
plot_rolling_cor(cor_data = cor_vivax_region)
roll_cor_24_vivax_region <- rolling_correlation(
data = region_dataset,
cols = c(
'vivax',
'aet',
'prcp',
'soilm',
'tmax'
),
index = 'report_date',
window = 24
)
# Plot
roll_cor_24_vivax_region %>%
tidyr::pivot_longer(
cols = c(
aet,
prcp,
soilm,
tmax
),
names_to = 'variable',
values_to = 'value'
) %>%
plot_rolling_cor(cor_data = cor_vivax_region)
# Get coefficients for a given window
get_coefficients <- function(data, formula, index){
library(MASS)
library(lubridate, warn.conflicts = FALSE)
library(tibble, warn.conflicts = FALSE)
model <- MASS::glm.nb(
formula = formula,
data = data
)
coefficients <- model$coefficients
n_coefficients <- length(coefficients)
coefficients_vector <- matrix(
data = coefficients,
nrow = 1,
ncol = n_coefficients
)
names_coefficients <- names(coefficients)
names_coefficients[1] <- 'intercept'
dimnames(coefficients_vector) <- list(
NULL,
names_coefficients
)
date <- lubridate::as_datetime(
x = as.matrix(data[index])
)
df_coefficients <- tibble::tibble(
from = min(date),
to = max(date)
)
df_coefficients <- tibble::add_column(
.data = df_coefficients,
tibble::as_tibble(coefficients_vector)
)
return(df_coefficients)
}
# Get coefficients' confidence interval lower bounds
get_coef_confint_lower <- function(data, formula, index){
library(MASS)
library(lubridate, warn.conflicts = FALSE)
library(tibble, warn.conflicts = FALSE)
model <- MASS::glm.nb(
formula = formula,
data = data
)
lower_bounds <- stats::confint(model)[,1]
n_coefficients <- length(lower_bounds)
lower_bounds_vector <- matrix(
data = lower_bounds,
nrow = 1,
ncol = n_coefficients
)
names_coefficients <- names(lower_bounds)
names_coefficients[1] <- 'intercept'
dimnames(lower_bounds_vector) <- list(
NULL,
names_coefficients
)
date <- lubridate::as_datetime(
x = as.matrix(data[index])
)
df_lower_bounds <- tibble::tibble(
from = min(date),
to = max(date)
)
df_lower_bounds <- tibble::add_column(
.data = df_lower_bounds,
tibble::as_tibble(lower_bounds_vector)
)
return(df_lower_bounds)
}
# Get coefficients' confidence interval upper bounds
get_coef_confint_upper <- function(data, formula, index){
library(MASS)
library(lubridate, warn.conflicts = FALSE)
library(tibble, warn.conflicts = FALSE)
model <- MASS::glm.nb(
formula = formula,
data = data
)
upper_bounds <- stats::confint(model)[,2]
n_coefficients <- length(upper_bounds)
upper_bounds_vector <- matrix(
data = upper_bounds,
nrow = 1,
ncol = n_coefficients
)
names_coefficients <- names(upper_bounds)
names_coefficients[1] <- 'intercept'
dimnames(upper_bounds_vector) <- list(
NULL,
names_coefficients
)
date <- lubridate::as_datetime(
x = as.matrix(data[index])
)
df_upper_bounds <- tibble::tibble(
from = min(date),
to = max(date)
)
df_upper_bounds <- tibble::add_column(
.data = df_upper_bounds,
tibble::as_tibble(upper_bounds_vector)
)
return(df_upper_bounds)
}
# Get rolling coefficients
rolling_nb_regression <- function(data, formula, index, window) {
library(slider)
library(lubridate, warn.conflicts = FALSE)
date <- lubridate::as_datetime(
x = as.matrix(data[index])
)
coefficients <- slider::slide_period_dfr(
.x = data,
.i = date,
.period = 'month',
.f = get_coefficients,
formula = formula,
index = index,
.every = 1,
.before = window - 1,
.complete = TRUE
)
lower_bounds <- slider::slide_period_dfr(
.x = data,
.i = date,
.period = 'month',
.f = get_coef_confint_lower,
formula = formula,
index = index,
.every = 1,
.before = window - 1,
.complete = TRUE
)
upper_bounds <- slider::slide_period_dfr(
.x = data,
.i = date,
.period = 'month',
.f = get_coef_confint_upper,
formula = formula,
index = index,
.every = 1,
.before = window - 1,
.complete = TRUE
)
return(
list(
coefficients = coefficients,
lower_bounds = lower_bounds,
upper_bounds = upper_bounds
)
)
}
pivot_output <- function(data, value){
library(tidyr, warn.conflicts = FALSE)
df <- tidyr::pivot_longer(
data = data,
cols = c(
intercept,
aet,
prcp,
soilm,
tmax
),
names_to = 'variable',
values_to = value
)
return(df)
}
# Rolling regression plot
plot_rolling_regre <- function(data){
library(dplyr, warn.conflicts = FALSE)
library(ggplot2, warn.conflicts = FALSE)
coefficients <- pivot_output(
data = data$coefficients,
value = 'coefficient'
)
lower_bounds <- pivot_output(
data = data$lower_bounds,
value = 'lb'
)
upper_bounds <- pivot_output(
data = data$upper_bounds,
value = 'ub'
)
coefficients %>%
dplyr::inner_join(
y = lower_bounds,
by = c('from', 'to', 'variable')
) %>% dplyr::inner_join(
y = upper_bounds,
by = c('from', 'to', 'variable')
) -> df
plot <- ggplot2::ggplot(
data = df,
ggplot2::aes(
x = to,
y = coefficient
)
) +
ggplot2::geom_ribbon(
mapping = ggplot2::aes(
ymin = lb,
ymax = ub
),
fill = 'grey70',
alpha = 0.8
) +
ggplot2::geom_line(
mapping = ggplot2::aes(
y = coefficient
)
) +
ggplot2::scale_x_datetime(
date_labels = '%Y',
date_breaks = '1 year'
) +
ggplot2::geom_vline(
data = control_dates,
ggplot2::aes(
xintercept = as.numeric(dates)
),
linetype = 'dashed',
colour = 'red'
) +
ggplot2::geom_hline(
ggplot2::aes(
yintercept = 0
),
linetype = 'dashed',
alpha = 0.6
) +
ggplot2::facet_wrap(
ncol = 1,
facets = . ~ variable,
scales = 'free'
) +
ggplot2::labs(
y = NULL,
x = NULL
)
print(plot)
}
falciparum_nb_model_formula <- stats::as.formula(
falciparum ~ aet + prcp + soilm + tmax
)
roll_regre_12_falciparum_region <- rolling_nb_regression(
data = region_dataset,
formula = falciparum_nb_model_formula,
index = 'report_date',
window = 12
)
# Plot
roll_regre_12_falciparum_region %>%
plot_rolling_regre()
roll_regre_18_falciparum_region <- rolling_nb_regression(
data = region_dataset,
formula = falciparum_nb_model_formula,
index = 'report_date',
window = 18
)
# Plot
roll_regre_18_falciparum_region %>%
plot_rolling_regre()
roll_regre_24_falciparum_region <- rolling_nb_regression(
data = region_dataset,
formula = falciparum_nb_model_formula,
index = 'report_date',
window = 24
)
# Plot
roll_regre_24_falciparum_region %>%
plot_rolling_regre()
vivax_nb_model_formula <- stats::as.formula(
vivax ~ aet + prcp + soilm + tmax
)
roll_regre_12_vivax_region <- rolling_nb_regression(
data = region_dataset,
formula = vivax_nb_model_formula,
index = 'report_date',
window = 12
)
# Plot
roll_regre_12_vivax_region %>%
plot_rolling_regre()
roll_regre_18_vivax_region <- rolling_nb_regression(
data = region_dataset,
formula = vivax_nb_model_formula,
index = 'report_date',
window = 18
)
# Plot
roll_regre_18_vivax_region %>%
plot_rolling_regre()
roll_regre_24_vivax_region <- rolling_nb_regression(
data = region_dataset,
formula = vivax_nb_model_formula,
index = 'report_date',
window = 24
)
# Plot
roll_regre_24_vivax_region %>%
plot_rolling_regre()
region_dataset %>%
dplyr::mutate(
t = as.numeric(report_date)
) -> region_dataset
library(mgcv)
falciparum_gam <- mgcv::gam(
formula = falciparum ~
s(t, by = tmax) +
s(t, by = aet) +
s(t, by = prcp) +
s(t, by = soilm),
family = nb(),
data = region_dataset
)
falciparum_smooth_pred <- mgcv::predict.gam(
object = falciparum_gam,
type = 'terms',
se.fit = TRUE
)
falciparum_fit <- falciparum_smooth_pred$fit
falciparum_model_matrix <- falciparum_gam$model[c(3, 4, 5, 6)]
falciparum_fit_values <- falciparum_fit / falciparum_model_matrix
falciparum_names <- colnames(falciparum_model_matrix)
falciparum_se_fit <- falciparum_smooth_pred$se.fit
colnames(falciparum_se_fit) <- falciparum_names
tibble::tibble(
report_date = region_dataset$report_date,
tibble::as_tibble(falciparum_fit_values)
) %>%
tidyr::pivot_longer(
cols = c(
tmax,
aet,
prcp,
soilm
),
names_to = 'variable',
values_to = 'fit'
) -> df_falciparum_fit_values
tibble::tibble(
report_date = region_dataset$report_date,
tibble::as_tibble(falciparum_se_fit)
) %>%
tidyr::pivot_longer(
cols = c(
tmax,
aet,
prcp,
soilm
),
names_to = 'variable',
values_to = 'se_fit'
) -> df_falciparum_se_fit
df_falciparum_fit_values %>%
dplyr::inner_join(
y = df_falciparum_se_fit,
by = c('report_date', 'variable')
) -> df_falciparum_gam
df_falciparum_gam %>%
ggplot2::ggplot(
ggplot2::aes(
x = report_date,
y = fit
)
) +
ggplot2::geom_line(
ggplot2::aes(
y = fit
)
) +
ggplot2::geom_ribbon(
ggplot2::aes(
ymin = fit - se_fit,
ymax = fit + se_fit
),
fill = 'grey70',
alpha = 0.8
) +
ggplot2::scale_x_datetime(
date_labels = '%Y',
date_breaks = '1 year'
) +
ggplot2::geom_vline(
data = control_dates,
ggplot2::aes(
xintercept = as.numeric(dates)
),
linetype = 'dashed',
colour = 'red'
) +
ggplot2::geom_hline(
ggplot2::aes(
yintercept = 0
),
linetype = 'dashed',
alpha = 0.6
) +
ggplot2::facet_wrap(
ncol = 1,
facets = . ~ variable,
scales = 'free'
) +
ggplot2::labs(
y = NULL,
x = NULL
)
vivax_gam <- mgcv::gam(
formula = vivax ~
s(t, by = tmax) +
s(t, by = aet) +
s(t, by = prcp) +
s(t, by = soilm),
family = nb(),
data = region_dataset
)
vivax_smooth_pred <- mgcv::predict.gam(
object = vivax_gam,
type = 'terms',
se.fit = TRUE
)
vivax_fit <- vivax_smooth_pred$fit
vivax_model_matrix <- vivax_gam$model[c(3, 4, 5, 6)]
vivax_fit_values <- vivax_fit / vivax_model_matrix
vivax_names <- colnames(vivax_model_matrix)
vivax_se_fit <- vivax_smooth_pred$se.fit
colnames(vivax_se_fit) <- vivax_names
tibble::tibble(
report_date = region_dataset$report_date,
tibble::as_tibble(vivax_fit_values)
) %>%
tidyr::pivot_longer(
cols = c(
tmax,
aet,
prcp,
soilm
),
names_to = 'variable',
values_to = 'fit'
) -> df_vivax_fit_values
tibble::tibble(
report_date = region_dataset$report_date,
tibble::as_tibble(vivax_se_fit)
) %>%
tidyr::pivot_longer(
cols = c(
tmax,
aet,
prcp,
soilm
),
names_to = 'variable',
values_to = 'se_fit'
) -> df_vivax_se_fit
df_vivax_fit_values %>%
dplyr::inner_join(
y = df_vivax_se_fit,
by = c('report_date', 'variable')
) -> df_vivax_gam
df_vivax_gam %>%
ggplot2::ggplot(
ggplot2::aes(
x = report_date,
y = fit
)
) +
ggplot2::geom_line(
ggplot2::aes(
y = fit
)
) +
ggplot2::geom_ribbon(
ggplot2::aes(
ymin = fit - se_fit,
ymax = fit + se_fit
),
fill = 'grey70',
alpha = 0.8
) +
ggplot2::scale_x_datetime(
date_labels = '%Y',
date_breaks = '1 year'
) +
ggplot2::geom_vline(
data = control_dates,
ggplot2::aes(
xintercept = as.numeric(dates)
),
linetype = 'dashed',
colour = 'red'
) +
ggplot2::geom_hline(
ggplot2::aes(
yintercept = 0
),
linetype = 'dashed',
alpha = 0.6
) +
ggplot2::facet_wrap(
ncol = 1,
facets = . ~ variable,
scales = 'free'
) +
ggplot2::labs(
y = NULL,
x = NULL
)